from vllm import LLM, SamplingParams

model_name = "Qwen/Qwen1.5-1.8B-Chat"
llm = LLM(model=model_name,
          max_model_len = 10240,
          block_size = 16)

dialog = [
    {"role": "User", "content": "你好。"},
    {"role": "Assistant", "content": "你好！我是Qwen助手。"},
    {"role": "User", "content": "写一首秋天的诗。"}
]

result = llm.chat(dialog, SamplingParams(temperature=1.0, top_p = 0.9, max_tokens=512))
print("Prompt:", result[0].prompt)
print("Output:", result[0].outputs[0].text)


